from bokeh.io import show, output_notebook, output_file
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange, Legend, HoverTool, DatetimeTickFormatter, BoxAnnotation, Toggle, CheckboxButtonGroup,CheckboxGroup
from bokeh.models import Label
from bokeh.transform import dodge
from bokeh.layouts import layout, gridplot, row, column, widgetbox
from bokeh.models.widgets import Tabs, Panel
from matplotlib.pyplot import viridis
import bokeh.palettes
from bokeh.embed import file_html
from bokeh.resources import CDN
from IPython.display import HTML
output_notebook()
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import json
from branca.colormap import linear
from folium.features import DivIcon
import folium
import math
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
%matplotlib inline
from bokeh.models import LinearColorMapper
from bokeh.models import ColumnDataSource
from bokeh.models import ColorBar
HTML('''<script>
$('div.input').hide('500')
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Display code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Display code"></form>''')
%%HTML
<style>
div.prompt {display:none}
</style>
Link to GitHub repo with explainer notebook and data: https://github.com/TueLindhart/Transport_vs_covid19
This interactive visualization will investigate how the Covid-19 lockdown in Denmark has affected public transport with busses in Greater Copenhagen? This will be done by answering the following questions:
Let's dive in and answer the first question right away.
The average speed for all busses in Greater Copenhagen is shown on the graph below
Since the lockdown started March 11'th, the average speed increased during the weekdays and weekends. From March 28'th (second step of the lockdown) the average speed stays consistently high, but still at a lower level than the average speed during the easter holidays. Also, notice how the speed starts to drop again at April 15'th during the first step in the opening of Denmark.
Try to hover over the graph. Can you figure out why the average speed fluctuates?
path = './Data/'
allspeed = pd.read_csv(os.path.join(path,"speedPerDay.csv"),sep=";",index_col=0)
allspeed.index = pd.to_datetime(allspeed.index)
source = ColumnDataSource(allspeed)
p = figure(plot_width=775, plot_height=300, x_axis_type="datetime",toolbar_location=None,
y_range=(25,32)
)
p.title.text = 'Avg. speed of public busses per day in Copenhagen'
color = bokeh.palettes.Category20[3]
legend_it = []
r = p.line("Date",
"Hast",
line_width=2,
color=color[1],
alpha=0.8,
muted_color=color[1],
muted_alpha=0.2,
visible=True,
source=source)
p.yaxis.axis_label = 'km/t'
hover = HoverTool(tooltips=[
('Speed: ', '@Hast km/t'),
("Date: ", '@Date{%d-%m}'),
("Day: ",'@DayOfWeek')],
formatters = {'@Date':'datetime'},
renderers=[r],
mode="vline")
p.add_tools(hover)
lockdown1_start = pd.to_datetime('20200311')
lockdown1_end = pd.to_datetime('20200318')
lockdown2_start = pd.to_datetime('20200318')
lockdown2_end = pd.to_datetime('20200415')
easter_start = pd.to_datetime('20200404')
easter_end = pd.to_datetime('20200412')
reopening_start = pd.to_datetime('20200415')
reopening_end = pd.to_datetime('20200421')
lockdown1 = BoxAnnotation(left=lockdown1_start, right=lockdown1_end,
fill_color='red', fill_alpha=0.1,visible=True)
lockdown2 = BoxAnnotation(left=lockdown2_start, right=lockdown2_end,
fill_color='red', fill_alpha=0.2,visible=True)
easter = BoxAnnotation(left=easter_start, right=easter_end,
fill_color='yellow', fill_alpha=0.2,visible=True)
reopening = BoxAnnotation(left=reopening_start, right=reopening_end,
fill_color='green', fill_alpha=0.1,visible=True)
p.add_layout(lockdown1)
p.add_layout(lockdown2)
p.add_layout(easter)
p.add_layout(reopening)
#toggle1 = Toggle(label="Lockdown vol. 1", button_type="success", active=True)
#toggle1.js_link('active', lockdown1, 'visible')
#toggle2 = Toggle(label="Lockdown vol. 2", button_type="success", active=True)
#toggle2.js_link('active', lockdown2, 'visible')
#toggle3 = Toggle(label="Easter Break",button_type="success",active=True)
#toggle3.js_link('active', easter, 'visible')
#toggle4 = Toggle(label="Denmarks start reopening",button_type="success",active=True)
#toggle4.js_link('active', reopening, 'visible')
#show(layout([p], [toggle1,toggle2,toggle4]))
lockdown1_cit = Label(x=235, y=230, x_units='screen', y_units='screen',
text='First step of lockdown', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='8pt')
lockdown2_cit = Label(x=280, y=210, x_units='screen', y_units='screen',
text='Second step of lockdown', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='8pt')
easter_cit = Label(x=485, y=230, x_units='screen', y_units='screen',
text='Easter Break', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='8pt')
opening_cit = Label(x=600, y=230, x_units='screen', y_units='screen',
text='Opening', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='8pt')
p.add_layout(lockdown1_cit)
p.add_layout(lockdown2_cit)
p.add_layout(easter_cit)
p.add_layout(opening_cit)
show(p)
The map below shows the percentage increase in average speed during the lockdown for each municipality.
The speed have increased in all municipalities, but vary from an increase of 0.14% to 8.8%.
Perhaps the increase in average speed for busses are caused by reduced congestion on highways? or less vehicles going through intricate roads networks in each municipality?
-> You can investigate yourself by disabling and enabling the map overlay to reveal the road network beneath.
import json
sel_mun = ["Albertslund","Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
"Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
"Rødovre","Tårnby","Vallensbæk"]
with open(os.path.join(path,"big_cph_lon_lat.json")) as f:
big_cph = json.load(f)
speed = pd.read_csv(os.path.join(path,"speedPerMun.csv"),sep=";")
speed = speed.set_index("Date")
before_lockdown = speed[speed.index < "2020-03-11"]
before_lockdown = pd.DataFrame(before_lockdown.mean(),columns=["avg_speed_feb"])
before_lockdown = before_lockdown.reset_index()
before_lockdown = before_lockdown.rename(columns={"index":"Kommune"})
after_lockdown = speed[speed.index >= "2020-03-11"]
after_lockdown = pd.DataFrame(after_lockdown.mean(),columns=["avg_speed_mar"])
after_lockdown = after_lockdown.reset_index()
after_lockdown = after_lockdown.rename(columns={"index":"Kommune"})
avg_speed = before_lockdown.merge(after_lockdown,on="Kommune")
avg_speed = avg_speed.set_index("Kommune")
avg_speed["per_increase"] = ((avg_speed.avg_speed_mar-avg_speed.avg_speed_feb)/avg_speed.avg_speed_feb)*100
mun_map = folium.Map(location=[55.676098,12.568337],zoom_start=11,tiles='OpenStreetMap')
colormap = linear.YlOrRd_04.scale(avg_speed["per_increase"].min(),avg_speed["per_increase"].max())
df_dict = avg_speed["per_increase"]
folium.GeoJson(
big_cph,
name='Percentage increase in speed after lockdown',
style_function=lambda feature: {
'fillColor': colormap(df_dict[feature['properties']['KOMNAVN']]),
'color': 'black',
'weight': 1,
#'dashArray': '5, 5',
'fillOpacity': 1,
}
).add_to(mun_map)
folium.LayerControl(position="bottomright",collapsed=False).add_to(mun_map)
colormap.caption = 'Percentage increase in speed'
colormap.add_to(mun_map)
coordinates = [[55.693587,12.344985],[55.734238,12.364346],
[55.653268,12.418906],[55.592974,12.645970],
[55.682220,12.514292],[55.753021,12.552256],
[55.747834,12.472433],[55.671153,12.398190],
[55.735531,12.434457],[55.634567,12.465933],
[55.665301,12.266118],[55.623098,12.330763],
[55.711402,12.533741],[55.682886,12.448000],
[55.610549,12.600898],[55.636898,12.368548]]
coordinates_dict = dict(zip(sel_mun,coordinates))
for m in sel_mun:
folium.map.Marker(
coordinates_dict[m],
icon=DivIcon(
icon_size=(150,36),
icon_anchor=(25,0),
html=f'<div style="font-size: 10pt">{m}</div>',
)
).add_to(mun_map)
mun_map
#Plan B:
#<img src="https://tuelindhart.github.io/Transport_vs_covid19/Images/Choropleth_speed.png" style="width: 800px;"/>
The speed increase per municipality in the map above is vizualised as a bar chart below.
focus_municipality = ["Albertslund","Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
"Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
"Rødovre","Tårnby","Vallensbæk"]
df_hast_daily = pd.read_csv(os.path.join(path,"speedPerMun.csv"),sep=";")
df_hast_daily = df_hast_daily.set_index("Date")
df_avg_hast_pre_lockdown = df_hast_daily[df_hast_daily.index < "2020-03-11"].mean(0)
df_avg_hast_post_lockdown = df_hast_daily[df_hast_daily.index >= "2020-03-11"].mean(0)
df_avg_hast = pd.DataFrame((df_avg_hast_post_lockdown - df_avg_hast_pre_lockdown)/df_avg_hast_pre_lockdown,
columns=["pct_increase"])*100
df_avg_hast = df_avg_hast.reset_index()
df_avg_hast.columns = ["Municipality","pct_increase"]
df_avg_hast = df_avg_hast.set_index("Municipality")
#Creating format fitting for bokeh
source = ColumnDataSource(df_avg_hast)
#Creating a list for the x-axis on the bar-chart.
x_range = [str(h) for h in source.data['Municipality']]
#Generating 14 colors for the different focus crimes
color = bokeh.palettes.Category20[20][2]
bar ={} # to store vbars
#Creating figure frame with title, x -and y labels and disabling toolbar.
p = figure(x_range=FactorRange(factors=x_range),
title='Increase in speed per municipality',
x_axis_label='Municipality',
y_axis_label='Increase in speed (%)',
plot_width=700,plot_height=400,
toolbar_location = None)
r = p.vbar("Municipality", top="pct_increase", source= source, width=0.8,
fill_alpha=0.5,line_color=None,
color=color,visible=True)
p.xaxis.major_label_orientation = math.pi/4
mun = r.name #extracting the name of each rendered crime
hover = HoverTool(tooltips=[
('Increase in speed (%)' , '@pct_increase{0.2f} %') #@ - extracts the fractions from each focuscrime
], renderers=[r]) # assigns the hoved effect to the correct bar rendering
p.add_tools(hover)
p.y_range.start = 0 #setting the y range from zero
show(p)
The lockdown has undeniably changed the travel pattern for everyone. Can this be observed in the average number of boarding passengers? This is shown on the barchart below, for the periods February and March for 2019 and 2020. These can be toggled individually to illustrate the difference between the two months for 2019 without lockdown and 2020 with lockdown.
Try to click on February 2020 and March 2020. The number of passengers in March has dropped in all municipalities and some places it seems to be cut in half! If you then try to click on February 2019 and March 2019 you can see the amount of passengers is relatively constant. This indicates that the drop in passengers is because of the lockdown and not an seasonal effect.
But how about yearly effects? The launch of a new bus network (Nyt Bynet) in the fall of 2019 because of the opening of the Metro City Ring can have an effect. If you click on February 2019 and February 2020 then you can see that the number of passengers stay mainly the same except in Gladsaxe, Høje-Taastrup, Frederiksberg and Copenhagen (København). This drop could be due to the opening of Nyt Bynet. Therefore, it is best to compare the drop in passengers in the same year.
sel_mun = ["Albertslund","Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
"Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
"Rødovre","Tårnby","Vallensbæk"]
df = pd.read_csv(os.path.join(path,'Passagertal_SenesteVersion.csv'),sep=";",
thousands=",")
df_201902 = df[["Kommune","På_201902","Af_201902"]].dropna()
df_201903 = df[["Kommune","På_201903","Af_201903"]].dropna()
df_202002 = df[["Kommune","På_202002","Af_202002"]].dropna()
df_202003 = df[["Kommune","På_202003","Af_202003"]].dropna()
passenger_df = df_201902.groupby(["Kommune"]).mean().loc[sel_mun]
passenger_df = passenger_df.join(df_201903.groupby(["Kommune"]).mean().loc[sel_mun],
on="Kommune")
passenger_df = passenger_df.join(df_202002.groupby(["Kommune"]).mean().loc[sel_mun],
on="Kommune")
passenger_df = passenger_df.join(df_202003.groupby(["Kommune"]).mean().loc[sel_mun],
on="Kommune")
#Creating format fitting for bokeh
source = ColumnDataSource(passenger_df)
#Creating a list for the x-axis on the bar-chart.
x_range = [str(h) for h in source.data['Kommune']]
#Generating 14 colors for the different focus crimes
colors = bokeh.palettes.Category20[8][0::2]
bar ={} # to store vbars
#Creating figure frame with title, x -and y labels and disabling toolbar.
p = figure(x_range=FactorRange(factors=x_range),
title='Barchart of average number of onboarding passengers',
x_axis_label='Municipalities',
y_axis_label='Avg. number of boarding passengers',
plot_width=950,plot_height=400,
toolbar_location=None)
#List of on - and offboarding passengers in february and march for 2019 and 2020
boardings = ["På_201902","På_201903","På_202002","På_202003"]
#Generating barcharts to p
for indx,i in enumerate(boardings):
bar[i] = p.vbar("Kommune", top=i, source= source, width=0.8,
fill_alpha=0.5,line_color=None,
color=colors[indx],visible=False)
p.xaxis.major_label_orientation = math.pi/4
legend_names = {"På_201902":"February 2019","På_201903":"March 2019",
"På_202002":"February 2020","På_202003":"March 2020",}
items = [(legend_names[i],[bar[i]]) for i in boardings] #Creating list of tuples with focus crime name and belonging bar chart.
legend = Legend(items=items,location=(0,152)) # Creating legends with 'items'
p.add_layout(legend,'right') # Adding legends to ´p´ and setting location.
for b in bar:
period = legend_names[b] #extracting the name of each rendered crime
hover = HoverTool(tooltips=[
# The name of the crime
('Avg. number of passengers: ' , '@{%s}' %b) #@ - extracts the fractions from each focuscrime
], renderers=[bar[b]]) # assigns the hoved effect to the correct bar rendering
p.add_tools(hover)
p.y_range.start = 0 #setting the y range from zero
p.legend.click_policy="hide"
p.legend.title = 'Click on names below to see barcharts'
show(p)
The barchart below takes the data from the barchart above and compares the change in passengers from February to March for 2019 and 2020.
If you click on Mar 2020 / Feb 2020 and hover over Vallensbæk you see that 68.86% passengers in March 2020 use the busses in relation to February 2020. For good measure you can compare it with the previous year Mar 2019/Feb 2019 and see that amount of passengers in 2019 roughly are the same.
Notice, that the percentage values for Mar 2020 / Feb 2020 isn't uniform. Maybe a map can shed some light on the differences:
dif_df = pd.DataFrame()
dif_df["dif_2019"] = (passenger_df.På_201903-passenger_df.På_201902).div(passenger_df.På_201902)*100
dif_df["dif_2020"] = (passenger_df.På_202003-passenger_df.På_202002).div(passenger_df.På_202002)*100
dif_df["per_2019"] = (passenger_df.På_201903).div(passenger_df.På_201902)*100
dif_df["per_2020"] = (passenger_df.På_202003).div(passenger_df.På_202002)*100
dif_df["per_feb"] = (passenger_df.På_201902-passenger_df.På_202002).div(passenger_df.På_201902)*100
dif_df["per_mar"] = (passenger_df.På_201903-passenger_df.På_202003).div(passenger_df.På_201903)*100
#Creating format fitting for bokeh
source = ColumnDataSource(dif_df)
#Creating a list for the x-axis on the bar-chart.
x_range = [str(h) for h in source.data['Kommune']]
#Generating 14 colors for the different focus crimes
colors = bokeh.palettes.Category20[8][0::2]
bar ={} # to store vbars
#Creating figure frame with title, x -and y labels and disabling toolbar.
p = figure(x_range=FactorRange(factors=x_range),
title='Passengers in march in relation to february in 2019 and 2020',
x_axis_label='Municipalities',
y_axis_label='% passengers in March in relation to February',
plot_width=950,plot_height=400,
toolbar_location=None)
#List of on - and offboarding passengers in february and march for 2019 and 2020
years = ["per_2019","per_2020"]
#Generating barcharts to p
for indx,i in enumerate(years):
bar[i] = p.vbar("Kommune", top=i, source= source, width=0.8,
fill_alpha=0.5,line_color=None,
color=colors[indx],visible=False)
p.xaxis.major_label_orientation = math.pi/4
legend_names = {"per_2019":"Mar 2019 / Feb 2019",
"per_2020":"Mar 2020 / Feb 2020"}
items = [(legend_names[i],[bar[i]]) for i in years] #Creating list of tuples with focus crime name and belonging bar chart.
legend = Legend(items=items,location=(0,198)) # Creating legends with 'items'
p.add_layout(legend,'right') # Adding legends to ´p´ and setting location.
for b in bar:
period = legend_names[b] #extracting the name of each rendered crime
hover = HoverTool(tooltips=[
# The name of the crime
('% passengers in March in relation to February: ' , '@{%s}{0.2f}' %b) #@ - extracts the fractions from each focuscrime
], renderers=[bar[b]]) # assigns the hoved effect to the correct bar rendering
p.add_tools(hover)
p.legend.click_policy="hide"
p.legend.title = 'Click on names below to see barcharts'
show(p)
The map shows the percentage decrease from February 2020 to March 2020. Surprisingly, the municipality with the biggest decrease is Albertslund followed by Copenhagen (København) and Frederiksberg.
df = pd.read_csv(os.path.join(path,"passenger_decrease.csv"),index_col="Kommune")*100
with open(os.path.join(path,"big_cph_lon_lat.json")) as f:
big_cph = json.load(f)
mun_map = folium.Map(location=[55.676098,12.568337],zoom_start=11,tiles='OpenStreetMap')
from branca.colormap import linear
from folium.features import DivIcon
colormap = linear.YlOrRd_04.scale(df["pct_drop_2020"].min(),df["pct_drop_2020"].max())
df_dict = df["pct_drop_2020"]
mun_map = folium.Map(location=[55.676098,12.568337],zoom_start=10.5,tiles='OpenStreetMap')
folium.GeoJson(
big_cph,
name='Percentage decrease for boarding passengers',
style_function=lambda feature: {
'fillColor': colormap(df_dict[feature['properties']['KOMNAVN']]),
'color': 'black',
'weight': 1,
#'dashArray': '5, 5',
'fillOpacity': 1,
}
).add_to(mun_map)
folium.LayerControl(position="bottomright",collapsed=False).add_to(mun_map)
colormap.caption = 'Percentage decrease from march 2019 to march 2020'
colormap.add_to(mun_map)
coordinates = [[55.693587,12.344985],[55.734238,12.364346],
[55.653268,12.418906],[55.592974,12.645970],
[55.682220,12.514292],[55.753021,12.552256],
[55.747834,12.472433],[55.671153,12.398190],
[55.735531,12.434457],[55.634567,12.465933],
[55.665301,12.266118],[55.623098,12.330763],
[55.711402,12.533741],[55.682886,12.448000],
[55.610549,12.600898],[55.636898,12.368548]]
coordinates_dict = dict(zip(sel_mun,coordinates))
for m in sel_mun:
folium.map.Marker(
coordinates_dict[m],
icon=DivIcon(
icon_size=(150,36),
icon_anchor=(25,0),
html=f'<div style="font-size: 10pt">{m}</div>',
)
).add_to(mun_map)
mun_map
colormap = linear.YlOrRd_04.scale(df["pct_drop_2020"].min(),df["pct_drop_2020"].max())
df_dict = df["pct_drop_2020"]
mun_map = folium.Map(location=[55.676098,12.568337],zoom_start=10.5,tiles='OpenStreetMap')
folium.GeoJson(
big_cph,
name='Percentage decrease for boarding passengers',
style_function=lambda feature: {
'fillColor': colormap(df_dict[feature['properties']['KOMNAVN']]),
'color': 'black',
'weight': 1,
#'dashArray': '5, 5',
'fillOpacity': 1,
}
).add_to(mun_map)
folium.LayerControl(position="bottomright",collapsed=False).add_to(mun_map)
colormap.caption = 'Percentage decrease from February 2020 to March 2020'
colormap.add_to(mun_map)
coordinates = [[55.693587,12.344985],[55.734238,12.364346],
[55.653268,12.418906],[55.592974,12.645970],
[55.682220,12.514292],[55.753021,12.552256],
[55.747834,12.472433],[55.671153,12.398190],
[55.735531,12.434457],[55.634567,12.465933],
[55.665301,12.266118],[55.623098,12.330763],
[55.711402,12.533741],[55.682886,12.448000],
[55.610549,12.600898],[55.636898,12.368548]]
coordinates_dict = dict(zip(sel_mun,coordinates))
for m in sel_mun:
folium.map.Marker(
coordinates_dict[m],
icon=DivIcon(
icon_size=(150,36),
icon_anchor=(25,0),
html=f'<div style="font-size: 10pt">{m}</div>',
)
).add_to(mun_map)
mun_map
We have now seen that there's been an increase in speed and a decrease in passengers during the Covid-19 lockdown. But can these resulsts be connected to the demographics of each municipality?
One way to explore the variation between municipalities is by using a scatter plot with the explanatory variable on the x-axis and the dependent variable on the y-axis.
This allows us to quantify the strength of the relationship between the public transport changes (speed and passengers) and the municipality demographics.
Below is shown four scatter plots with the percentage increase in speed as a function of the following four demographic variables:
Note: The regression is based on a low number of data points, therefore this regression approach only gives an indication of possible relationships.
Try to look at the four graphs and consider if some of the variables can explain the increase in average speed. You can hover over the data points for details. You can also mark a data point and it will highlight the selected municipality on the other graphs.
Can you find Høje-Taastrup on all the graphs? How well does it fit the linear regression line?
Also, have you noticed that you can make the x-axis for Density logarithmic? Can you see what it does to the graph if you click on it? Exactly, the points become more linear. That means the relashionship between Density and % increase in speed appers to be logarithmic! Can you see if there appears to be linear relashions on the other graphs?
df = pd.read_csv(os.path.join(path,"allVariables.csv"),index_col="Municipality")
df["log_Density"] = np.log(df["Density"])
df = df[["Density","DagNatRatio","pct_highly_educated","pct_not_vestern","log_Density","speed_change"]]
df["DagNatRatio"] = round(df["DagNatRatio"]*100,2)
df["speed_change"] = round(df["speed_change"]*100,2)
df["pct_highly_educated"] = round(df["pct_highly_educated"]*100,2)
df["pct_not_vestern"] = round(df["pct_not_vestern"]*100,2)
# Calculating linear regression lines #################################################################################
linear_regressor = LinearRegression()
df_temp = pd.DataFrame()
#Generating 14 colors for the different focus crimes
color = bokeh.palettes.Category20[20][::2]
for x in df.columns:
X = df[x].to_numpy().reshape(-1,1)
Y = df.speed_change.to_numpy().reshape(-1,1)
linear_regressor.fit(X,Y)
Y_pred = linear_regressor.predict(X)
Y_pred = list(Y_pred.squeeze())
name = "prediction_" + x
df_temp[name] = Y_pred
df_temp = df_temp.set_index(df.index)
df = df.join(df_temp,on="Municipality")
########### Create figure #########################################################################################################
source = ColumnDataSource(data = df)
f = [0]*5
toggle = [0]*10
var_dict = {"Density":" Density (persons/km^2)",
"DagNatRatio":"Day/Night %",
"pct_highly_educated":"% with high education",
"pct_not_vestern":"% with non-vestern background",
"log_Density": "Log Density (persons/km^2)"}
for i,var in enumerate(df.columns[:5]):
name = var_dict[var]
TOOLTIPS = [("Municipality: ", "@Municipality"),
(name, f"@{var}"),
("Increase in speed: ","@speed_change{0.2f}")]
# create figure
name = var_dict[var]
f[i] = figure(plot_width=400,plot_height=350,
title = f'Speed increase vs. {name}' ,
x_axis_label=f'{name}',
y_axis_label='% increase in speed',
background_fill_color = "white",background_fill_alpha = 0.8,
border_fill_color = "white", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
#y_axis_type="log",
x_axis_type= "linear",
toolbar_location = None,
tools = "box_select",
)
## Apply R^2 label ##############################################################################################
cor = df[["speed_change",var]].corr()
R_sqrt = round(cor.iloc[0,1]**2,3)
R_sqrt_Label = Label(x=200, y=5, x_units='screen', y_units='screen',
text=f' R^2 value: {R_sqrt} ', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='12pt',visible=False
#text_font="times"
)
# Create plot
f[i].circle(x = var, y = 'speed_change', size=10,source=source,color=color[i])
predict_name = "prediction_" + var
#Create linear regression line
line = f[i].line(x=var, y = predict_name,source=source,color="grey",line_width=2,visible=False)
f[i].add_layout(R_sqrt_Label)
toggle1 = Toggle(label="Show regression line", button_type="default", active=False,max_width=400)
toggle1.js_link('active', line, 'visible')
toggle2 = Toggle(label="Show R^2 value", button_type="default", active=False,width=400)
toggle2.js_link('active', R_sqrt_Label, 'visible')
toggle[i] = toggle1
toggle[i+5] = toggle2
toggles1 = column(toggle[0],toggle[5])
toggles2 = column(toggle[1],toggle[6])
toggles3 = column(toggle[2],toggle[7])
toggles4 = column(toggle[3],toggle[8])
toggles5 = column(toggle[4],toggle[9])
tab1 = Panel(child=f[0],title="Linear x-axis")
tab2 = Panel(child=f[4],title="Logarithmic x-axis")
tab3 = Panel(child=toggles1,title="Linear")
tab4 = Panel(child=toggles5,title="Log")
tab1 = Tabs(tabs=[tab1,tab2])
tab2 = Tabs(tabs=[tab3,tab4])
l = layout([[tab1,f[1]],
[tab2,toggles2],
[f[2],f[3]],
[toggles3,toggles4]],
)
show(l)
Sometimes it is helpful to use linear regression to see how well you can make a linear line fit the data. In other words, we want to see how well we can predict the percentage change in speed by our 4 variables. Try to click on the 4 buttons Show regression line and see how the lines fit the data. As suspected there is a positive linear relashionship between the log(Density) and a negative linear relashionship with% with non-vestern background while there is a small correlation with % with high education and with Day/Night %
However, just because the slope of the line is negative or positive it does not necessarily mean that the line predicts the data well. A helpful measure is the $R^2$ with a value between 0 and 1 which basically measures how well the line explains variable y (%s change in speed in our case) where 0 denotes it does not explain the data at all and 1 denotes it perfectly fits the data. Try to click on the Show R^2 value and see how well the regression lines fit the data.
The highest $R^2$ value is for $log(Density)$ on 0.338 which means that 33.8% of the change in speed can be explained by the population density of the municipality. Notice, that the $R^2$ value lowers to 0.237 if we don't take the logarithm.
Taking into consideration that there probably are many factors that weigh in on the % increase in speed, then a $R^2$ on 0.338 is pretty good! It intuively also makes sense because densily populated cities usually have more traffic jams. That the relashionship is logarithmic means that the density of the population at some point won't have the same effect on the % increase in speed
The lowest $R^2$ value is 0.0 for Day/Night % which is somewhat surprising. Maybe it is because the use of vehicles rarely is restricted to only to one municipality so when people work outside their municipality then their car also effects the traffic in other municipalities.
How does these explanatory demographic variables describe the decrease in average passengers?
Does any of the explanatory variables have a stronger linear relationship with the decrease in average passengers than the increase in average speed? Try to click on the Show regression line and Show R^2 value to see if that is the case.
The Day/Night % has the highest $R^2$ value and explains 37.8% of the decrease in passengers.
This indicates that the % decrease in passengers is depending on how many people study or work outside the municipality. It also appears by looking at the $R^2$ value that Density can have an effect on % decrease in passengers. However, $R^2$ is not a perfect measure and most values are vertically clustered to the left with a few exceptions to the right, stressing the fact that this regression analysis is based upon very few data points.
df = pd.read_csv(os.path.join(path,"allVariables.csv"),index_col="Municipality")
df["log_Density"] = np.log(df["Density"])
df = df[["Density","DagNatRatio","pct_highly_educated","pct_not_vestern","log_Density","passenger_change"]]
df["DagNatRatio"] = round(df["DagNatRatio"]*100,2)
df["passenger_change"] = round(df["passenger_change"]*100,2)
df["pct_highly_educated"] = round(df["pct_highly_educated"]*100,2)
df["pct_not_vestern"] = round(df["pct_not_vestern"]*100,2)
# Calculating linear regression lines #################################################################################
linear_regressor = LinearRegression()
df_temp = pd.DataFrame()
#Generating 14 colors for the different focus crimes
color = bokeh.palettes.Category20[20][::2]
for x in df.columns:
X = df[x].to_numpy().reshape(-1,1)
Y = df.passenger_change.to_numpy().reshape(-1,1)
linear_regressor.fit(X,Y)
Y_pred = linear_regressor.predict(X)
Y_pred = list(Y_pred.squeeze())
name = "prediction_" + x
df_temp[name] = Y_pred
df_temp = df_temp.set_index(df.index)
df = df.join(df_temp,on="Municipality")
########### Create figure #########################################################################################################
source = ColumnDataSource(data = df)
f = [0]*5
toggle = [0]*10
var_dict = {"Density":" Density (persons/km^2)",
"DagNatRatio":"Day/Night Ratio",
"pct_highly_educated":"% with high education",
"pct_not_vestern":"% with non-vestern background",
"log_Density": "Log Density (persons/km^2)"}
for i,var in enumerate(df.columns[:5]):
name = var_dict[var]
TOOLTIPS = [("Municipality: ", "@Municipality"),
(name, f"@{var}"),
("Decrease in passengers: ","@passenger_change{0.2f}")]
# create figure
name = var_dict[var]
f[i] = figure(plot_width=400,plot_height=350,
title = f'Passenger decrease vs. {name}' ,
x_axis_label=f'{name}',
y_axis_label='% decrease in passengers',
background_fill_color = "white",background_fill_alpha = 0.8,
border_fill_color = "white", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
#y_axis_type="log",
x_axis_type= "linear",
toolbar_location = None,
tools = "box_select",
)
## Apply R^2 label ##############################################################################################
cor = df[["passenger_change",var]].corr()
R_sqrt = round(cor.iloc[0,1]**2,3)
R_sqrt_Label = Label(x=200, y=5, x_units='screen', y_units='screen',
text=f' R^2 value: {R_sqrt} ', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='12pt',visible=False
#text_font="times"
)
# Create plot
f[i].circle(x = var, y = 'passenger_change', size=10,source=source,color=color[i])
predict_name = "prediction_" + var
#Create linear regression line
line = f[i].line(x=var, y = predict_name,source=source,color="grey",line_width=2,visible=False)
f[i].add_layout(R_sqrt_Label)
toggle1 = Toggle(label="Show regression line", button_type="default", active=False,max_width=400)
toggle1.js_link('active', line, 'visible')
toggle2 = Toggle(label="Show R^2 value", button_type="default", active=False,width=400)
toggle2.js_link('active', R_sqrt_Label, 'visible')
toggle[i] = toggle1
toggle[i+5] = toggle2
toggles1 = column(toggle[0],toggle[5])
toggles2 = column(toggle[1],toggle[6])
toggles3 = column(toggle[2],toggle[7])
toggles4 = column(toggle[3],toggle[8])
toggles5 = column(toggle[4],toggle[9])
tab1 = Panel(child=f[0],title="Linear x-axis")
tab2 = Panel(child=f[4],title="Logarithmic x-axis")
tab3 = Panel(child=toggles1,title="Linear")
tab4 = Panel(child=toggles5,title="Log")
tab1 = Tabs(tabs=[tab1,tab2])
tab2 = Tabs(tabs=[tab3,tab4])
l = layout([[tab1,f[1]],
[tab2,toggles2],
[f[2],f[3]],
[toggles3,toggles4]],
)
show(l)
Do you remember that Albertslund stood out on the map before? Try to find it on the scatter plot and mark it by clicking and dragging to see where it is on the other scatter plots. Do you think the variables explain Albertslund or does it appear to be an outlier?
It appears that none of the variables explain why Albertslund stands out. This can be due to some special conditions that only apply to Albertslund. However, Albertslund seemingly affects the fitting of the line substaintially because we have few observations.
For fun lets say we want to predict the decrease in passengers for all municipalities except Albertslund and see how well the lines fit the data which can be seen on the graphs below.
df = pd.read_csv(os.path.join(path,"allVariables.csv"),index_col="Municipality")
df["log_Density"] = np.log(df["Density"])
df = df[["Density","DagNatRatio","pct_highly_educated","pct_not_vestern","log_Density","passenger_change"]]
df["DagNatRatio"] = round(df["DagNatRatio"]*100,2)
df["passenger_change"] = round(df["passenger_change"]*100,2)
df["pct_highly_educated"] = round(df["pct_highly_educated"]*100,2)
df["pct_not_vestern"] = round(df["pct_not_vestern"]*100,2)
df = df[df.index != "Albertslund"]
# Calculating linear regression lines #################################################################################
linear_regressor = LinearRegression()
df_temp = pd.DataFrame()
#Generating 14 colors for the different focus crimes
color = bokeh.palettes.Category20[20][::2]
for x in df.columns:
X = df[x].to_numpy().reshape(-1,1)
Y = df.passenger_change.to_numpy().reshape(-1,1)
linear_regressor.fit(X,Y)
Y_pred = linear_regressor.predict(X)
Y_pred = list(Y_pred.squeeze())
name = "prediction_" + x
df_temp[name] = Y_pred
df_temp = df_temp.set_index(df.index)
df = df.join(df_temp,on="Municipality")
########### Create figure #########################################################################################################
source = ColumnDataSource(data = df)
f = [0]*5
toggle = [0]*10
var_dict = {"Density":" Density (persons/km^2)",
"DagNatRatio":"Day/Night Ratio",
"pct_highly_educated":"% with high education",
"pct_not_vestern":"% with non-vestern background",
"log_Density": "Log Density (persons/km^2)"}
for i,var in enumerate(df.columns[:5]):
name = var_dict[var]
TOOLTIPS = [("Municipality: ", "@Municipality"),
(name, f"@{var}"),
("Decrease in passengers: ","@passenger_change{0.2f}")]
# create figure
name = var_dict[var]
f[i] = figure(plot_width=400,plot_height=350,
title = f'Passenger decrease vs. {name}' ,
x_axis_label=f'{name}',
y_axis_label='% decrease in passengers',
background_fill_color = "white",background_fill_alpha = 0.8,
border_fill_color = "white", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
#y_axis_type="log",
x_axis_type= "linear",
toolbar_location = None,
tools = "box_select",
)
## Apply R^2 label ##############################################################################################
cor = df[["passenger_change",var]].corr()
R_sqrt = round(cor.iloc[0,1]**2,3)
R_sqrt_Label = Label(x=200, y=5, x_units='screen', y_units='screen',
text=f' R^2 value: {R_sqrt} ', render_mode='canvas',
border_line_color='black', border_line_alpha=1.0,
background_fill_color='white', background_fill_alpha=1.0,
text_font_size='12pt',visible=False
#text_font="times"
)
# Create plot
f[i].circle(x = var, y = 'passenger_change', size=10,source=source,color=color[i])
predict_name = "prediction_" + var
#Create linear regression line
line = f[i].line(x=var, y = predict_name,source=source,color="grey",line_width=2,visible=False)
f[i].add_layout(R_sqrt_Label)
toggle1 = Toggle(label="Show regression line", button_type="default", active=False,max_width=400)
toggle1.js_link('active', line, 'visible')
toggle2 = Toggle(label="Show R^2 value", button_type="default", active=False,width=400)
toggle2.js_link('active', R_sqrt_Label, 'visible')
toggle[i] = toggle1
toggle[i+5] = toggle2
toggles1 = column(toggle[0],toggle[5])
toggles2 = column(toggle[1],toggle[6])
toggles3 = column(toggle[2],toggle[7])
toggles4 = column(toggle[3],toggle[8])
toggles5 = column(toggle[4],toggle[9])
tab1 = Panel(child=f[0],title="Linear x-axis")
tab2 = Panel(child=f[4],title="Logarithmic x-axis")
tab3 = Panel(child=toggles1,title="Linear")
tab4 = Panel(child=toggles5,title="Log")
tab1 = Tabs(tabs=[tab1,tab2])
tab2 = Tabs(tabs=[tab3,tab4])
l = layout([[tab1,f[1]],
[tab2,toggles2],
[f[2],f[3]],
[toggles3,toggles4]],
)
show(l)
Now all regression lines explain minimum 21% of the variation of the decrease of passengers where % with high education goes from explaining 16.6% to 42.0%! One possible interpretation is that people with a high education has a better possibility to work or study from home and that there are specific things going on in Albertslund that makes it deviate from the rest. However, what it most of all shows is how sensitive linear regression is when you have few data points and therefore we stress that the correlations we find at the best can be interpretated as an indication.
So far, we have gotten a better understanding of how a single variable alone affects the % speed and passenger change in the municipalities. However, we do not understand how the variables put together explains the % change. We also add some more variables which are % unemployed, disposable income and median age. which gives 7 variables in total. We cannot plot in more than 3D so we have to be smart about it. Luckily, Karl Pearson came up with a smart technique back in 1901 called Principal Component Analysis.
We won't go into details of how it works, but basically each Principal Component (PC) shows how variables explain variations in the data. So, if you click on the PC1 for % speed increase on the graph below we can tell you that if you have a municipality with the combination above average population density, above average unemployment, above average education level and below average % of non-vestern immigrants and median age then it is likely the increase in speed will be high. How could we tell that you might wonder?
Imagine that you have a checkbox in front of you where you can give a checkmark for each variable and the more you can check off, the more likely it is that there is a high increase in speed or high decrease in passengers.
You need to know 3 things to be able to 'check the boxes of':
Now we have a challenge for you. Click on PC3 for passenger decrease and answer this question. If we have a municipality with below average population density, above average of people present both day and night, below average unemployment and education level and below average median age, is the municipality then likely to have a large or small decrease in passengers? To help you keep track we have made a checkbox.
## Prepare data ##################################################################################
variables = ["log_Density","DagNatRatio","pct_unemployed","pct_highly_educated",
"pct_not_vestern","income","median_age",
]
change_variables = ["speed_change","passenger_change"]
var_dict = dict(zip(range(9),["log Density","Day/Night %","% unemployed","% highly educated","% non-vestern",
"Disposable income","Median age","% increase in speed","% decrease in passengers"]))
sel_mun = [
"Albertslund",
"Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
"Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
"Rødovre","Tårnby","Vallensbæk"]
df = pd.read_csv(os.path.join(path,"allVariables.csv"),sep=",",index_col=0)
df["log_Density"] = np.log(df["Density"])
p = {}
change_dict = {"speed_change":"Speed increase (%)","passenger_change":"Passenger decrease (%)"}
for c in change_variables:
#df = df[df.index != "Albertslund"]
x = df[variables+[c]].values
x = StandardScaler().fit_transform(x)
df_std = pd.DataFrame(data = x, columns = variables+[c])
pca = PCA(n_components=4)
PC = pca.fit_transform(df_std)
components = pca.components_
PC_list = ["PC"+str(i) for i in range(1,5)]
PC_4D = pd.DataFrame(components,columns=variables+[c],index=PC_list).T
## Create plot ###################################################################################
#Creating format fitting for bokeh
source = ColumnDataSource(PC_4D)
#Creating a list for the x-axis on the bar-chart.
x_range = [str(h) for h in source.data['index']]
#Generating 14 colors for the different focus crimes
colors = bokeh.palettes.Category20[18][0::2]
bar ={} # to store vbars
#Creating figure frame with title, x -and y labels and disabling toolbar.
p[c] = figure(x_range=FactorRange(factors=x_range),
title=f'{change_dict[c]} Principal Component Coefficients',
x_axis_label='Variables',
y_axis_label='Coefficients',
plot_width=500,plot_height=400,
toolbar_location = None)
PCs = ["PC1","PC2","PC3","PC4"]
#Generating barcharts to p
for indx,i in enumerate(PCs):
bar[i] = p[c].vbar("index", top=i, source= source, width=0.5,
fill_alpha=0.5,line_color=None,
color=colors[indx],visible=False)
p[c].xaxis.major_label_orientation = math.pi/4
items = [(i,[bar[i]]) for i in PCs] #Creating list of tuples with focus crime name and belonging bar chart.
legend = Legend(items=items,location=(0,125),) # Creating legends with 'items'
p[c].add_layout(legend,"right") # Adding legends to ´p´ and setting location.
p[c].legend.click_policy="hide"
p[c].legend.title = 'Show PC'
show(row(p["speed_change"],p["passenger_change"]))
#checkbox_group = CheckboxGroup(
# labels=variables, active=[0, 1])
checkbox_group = CheckboxGroup(
labels=variables)
show(checkbox_group)
Click here to see the answer.
Actually, the Principal Components (PC) are axes in a coordinate system and the coefficients shown above control how low (-) or how high (+) they land on the PC's. The PC's ensure that there are as much variation as possible which means we can make it possible to see what makes the municipalities increase in speed and decrease in passengers apart. The plots are color coded so the darker the green the higher the change.
Try to hover over the darkest circle on the plot to the left and you will see that it is Rødovre with the highest increase in speed and that it scores positive on PC1 and negatively on PC3. Then try to hover over the lightest circle and you will see that it is Høje-Taastrup with the lowest increase in speed that is negative on PC1 and positve on PC3.
Now notice how the colors generally are light on one side of the dashed line and generally dark on the other side of the dashed line. This means that we simply by looking at where they land on the PC's can determine if the increase in speed or decrease in passengers is likely to become high or low!
############ prepare data ##############################################################################################
from branca.colormap import linear
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from bokeh.layouts import layout, gridplot, row, column, widgetbox
%matplotlib inline
variables = ["log_Density","DagNatRatio","pct_unemployed","pct_highly_educated",
"pct_not_vestern","income","median_age",
]
change_variables = ["speed_change","passenger_change"]
change_dict = {"speed_change":"Speed increase (%)","passenger_change":"Passenger decrease (%)"}
sel_mun = [
"Albertslund",
"Ballerup","Brøndby","Dragør","Frederiksberg","Gentofte","Gladsaxe",
"Glostrup","Herlev","Hvidovre","Høje-Taastrup","Ishøj","København",
"Rødovre","Tårnby","Vallensbæk"]
df = pd.read_csv(os.path.join(path,"allVariables.csv"),sep=",",index_col=0)
df["log_Density"] = np.log(df["Density"])
#df = df[df.index != "Albertslund"]
for c in change_variables:
x = df[variables.copy()+[c]].copy().values
x = StandardScaler().fit_transform(x)
df_std = pd.DataFrame(data = x, columns = variables.copy()+[c],index=sel_mun)
pca = PCA(n_components=4)
PC_color = pca.fit_transform(df_std.copy())
df_color = pd.DataFrame(PC_color,index=sel_mun,columns=["PC1","PC2","PC3","PC4"])
#colormap = linear.YlGn_09.scale(df[c].min(),df[c].max())
color = bokeh.palettes.YlGn[9][::-1]
df_color[c] = df[c]*100
#df_color["color"] = [colormap(df.loc[m,c]) for m in sel_mun]
exp_cmap = LinearColorMapper(palette=color,
high = max(df_color[c]),
low = min(df_color[c]))
#Create overview of values being above or below mean
df_above_below = (df_std > 0)
for cl in df_above_below.columns:
df_above_below[cl] = ["Above" if cl else "Below" for cl in df_above_below[cl]]
df_above_below = df_above_below.iloc[:,:-1]
df_color = df_color.reset_index()
df_color = df_color.join(df_above_below,on="index")
df_color = df_color.set_index("index")
########### Create figure #########################################################################################################
source = ColumnDataSource(data = df_color)
TOOLTIPS = [("Municipality: ", "@index"),
("% change", "@{%s}" %c),
("log Density","@log_Density"),
("Day/Night","@DagNatRatio"),
("Unemployment","@pct_unemployed"),
("Education","@pct_highly_educated"),
("Non-vestern","@pct_not_vestern"),
("Income", "@income"),
("Median age","@median_age")]
# create figure
p[c] = figure(plot_height = 450, plot_width = 450,
title = f'{change_dict[c]}: PC1 and PC3',
x_axis_label='Principal Component 1',
y_axis_label='Principal Component 3',
background_fill_color = "white",background_fill_alpha = 0.8,
border_fill_color = "white", border_fill_alpha = 0.8, tooltips=TOOLTIPS,
#y_axis_type="log"
toolbar_location = None,
y_range=(-2.5,2.7)
)
# Create plot
#s.circle(x = "PC3", y = 'PC4', size=15,source=source,color="speed_color",line_alpha=1,line_color="black")
p[c].circle(x = "PC1", y = 'PC3', size=15,source=source,
fill_color={"field":c,"transform":exp_cmap},
line_alpha=1,line_color="black")
colorbar = ColorBar(color_mapper=exp_cmap, location=(0,0))
p[c].add_layout(colorbar, "right")
p[c].line([-2.5, 4.5], [-2, 2.1], line_color="grey",line_dash="dashed")
show(row(p["speed_change"],p["passenger_change"]))
So do you remember Albertslund seemed to be an outlier when doing linear regression? Let us see if we now can explain why it has a high change in passengers. If you hover over the darkest circle on the right plot you will find Alberslund. Albertslund almost lies on 0 at PC1 while it is the lowest point on PC3. So PC3 alone can explain why Albertslund have a high decrease in passengers. Let's look at its values.
Wait a little. They are in fact identical to the municipality described above so you have actually already explained why Albertslund has a high decrease in passengers.
Note: It is important to emphasize that we cannot conclude anything on the basis of 16 data points and they can only give us indications.
Let us summarize what we have learned about public transport during the covid-19 lockdown. The easiest way to do that is to consider the questions we had from the start:
Yes, the average speed on roads has increased during the covid-19 lockdown and yes, it is different how much it has increased in each municipality. This indicates that there are less vehicles on the roads, and there is underlying reasons for the increase being different.
Yes, the average amount of passengers has decreased during the covid-19 lockdown and yes, the decrease is not similar for each municipality. This indicates that the use of public transport has decreased during the covid-19 lockdown, but it also indicates that there exists underlying reasons for the decrease not being equal for each municipality.
This one is more difficult to answer with confidence since (as you probably already know we will say) we only have 16 municipalities to compare. Therefore, we cannot conclude that the demographics we looked at can explain the different developments in speed and passengers per municipality.
However, we can speculate in what our analysis indicates. The variable that best explains passenger decrease seems to be the Day/Night percentage which makes sense since muncipalities with a high Day/Night percentage probably can stay at home easier or have a short enough distance to take the bike.
Interestingly, Day/Night percentage do not explain the increase in speed ($R^2$ on 0.0), but instead it is population density that best explains increase in speed. The reason for this might be that cars do not drive inside one munipality. Therefore, the effect of driving less to work in one municipality can be measured in several municipalities. Also, it is interesting to note that the top two highest $R^2$ scores for speed increase, is the two lowest for passenger decrease.
Lastly, the Principal Component Analysis indicates that it is possible to explain the change in speed and passenger decrease if we use a combination of demographic variables. So even though we strictly speaking cannot conclude if the demographic of the municipalities can explain the difference in change, then we at least have indications that they in combination are able to do so.